library(survival)
library(KMsurv)
library(ggplot2)
library(ggpubr)
library(survminer)
library(plotly)
library(muhaz)
library(ggthemes)
vars <- c("id", "clinic", "status", "time", "prison", "methadone")
addicts <-
read.table(
url(
"https://dmrocke.ucdavis.edu/Class/BST222.2022.Fall/addicts.txt"
),
header = F,
col.names = vars
)
#addicts
#change variables to factors to be used in Cox PH
addicts$clinic <-
factor(addicts$clinic, labels = c("Clinic1", "Clinic2"))
addicts$prison <- factor(addicts$prison, labels = c("No", "Yes"))
#head(addicts)
surv_object <- Surv(time = addicts$time, event = addicts$status)
addicts.cox.strat <-
coxph(surv_object ~ strata(clinic) + prison + methadone, data = addicts)
summary(addicts.cox.strat)
## Call:
## coxph(formula = surv_object ~ strata(clinic) + prison + methadone,
## data = addicts)
##
## n= 238, number of events= 150
##
## coef exp(coef) se(coef) z Pr(>|z|)
## prisonYes 0.389605 1.476397 0.168930 2.306 0.0211 *
## methadone -0.035115 0.965495 0.006465 -5.432 5.59e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## prisonYes 1.4764 0.6773 1.0603 2.0559
## methadone 0.9655 1.0357 0.9533 0.9778
##
## Concordance= 0.651 (se = 0.026 )
## Likelihood ratio test= 33.91 on 2 df, p=4e-08
## Wald test = 32.66 on 2 df, p=8e-08
## Score (logrank) test = 33.33 on 2 df, p=6e-08
addicts.zph.stra <- cox.zph(addicts.cox.strat)
addicts.zph.stra
## chisq df p
## prison 0.0264 1 0.87
## methadone 1.0552 1 0.30
## GLOBAL 1.1302 2 0.57
#For clinic
ggcoxzph(addicts.zph.stra[1], se = FALSE, font.main = 12, ggtheme = theme_solarized(), point.col = "#0096FF", ylim = c(-4,4))
#For prison
ggcoxzph(addicts.zph.stra[2], se = FALSE, font.main = 12, ggtheme = theme_solarized(), point.col = "#0096FF", ylim = c(-0.3,0.3))
We can see from the plot that the prison variable has a sinusoidal
pattern of variation. Meanwhile, our methadone variable seems to have
little evidence of a pattern, with a plot that shows random scatter.
addicts.mart <- residuals(addicts.cox.strat, type = "martingale")
addicts.cs <- addicts$status - addicts.mart
#Cumulative hazard
surv.csr <- survfit(Surv(addicts.cs, addicts$status) ~1, type = "fleming-harrington", data = addicts)
#plot(surv.csr, fun ="cumhaz")
cumHazPlot <-
ggsurvplot(
surv.csr,
fun = "cumhaz",
conf.int = TRUE,
palette = c("#581845"),
ggtheme = theme_solarized()
) + ggtitle("Cumulative Hazard for Cox-Snell Residuals")
#ggplotly(cumHazPlot[[1]])
ggplotly(cumHazPlot$plot + geom_abline())
We can see that after time about 2.5, our cumulative hazard does not follow our straight trend line. This gives us the impression that our model lacks in being a good fit for the data.
#This time, without methadone
addicts.coxNoMeth <- coxph(surv_object ~ strata(clinic) + prison, data = addicts)
addicts.mart <- residuals(addicts.coxNoMeth, type = "martingale")
lowessOBJ <- as.data.frame(lowess(addicts$methadone, addicts.mart))
ggplotly(
ggplot() + aes(addicts$methadone, addicts.mart) + geom_point(col = "#FFA000") + labs(x = "Methadone", y = "Martingale Residuals", title = "Martingale Residuals vs. Methadone Dosage") + geom_line(data = lowessOBJ, aes(x = x, y = y), col = "#388E3C") + theme_solarized()
)
Since our lowess line seems pretty linear, we would advise that a transformation of the data is not neccessary.
# With Linear predictor
addicts.predict <- predict(addicts.cox.strat)
addicts.mart <- residuals(addicts.cox.strat, type = "martingale")
#Deviance and df beta
addicts.dev <- residuals(addicts.cox.strat, type = "deviance")
addicts.dfb <- residuals(addicts.cox.strat, type = "dfbeta")
#MArtingale vs Linear Predictor
ggplotly(
ggplot() + aes(
x = addicts.predict,
y = addicts.mart,
label = rownames(addicts)
) + geom_point() + labs(x = "Linear Predictor", y = "Martingale Residual", title = "Martingale Residuals vs Linear Predictor") + theme_solarized()
)
# Deviance vs Linear Predictor
ggplotly(
ggplot() + aes(
x = addicts.predict,
y = addicts.dev,
label = rownames(addicts)
) + geom_point() + labs(x = "Linear Predictor", y = "Deviance Residual", title = "Deviance Residuals vs Linear Predictor") + theme_solarized()
)
# DfBeta vs Linear Predictor
# Clinic
# ggplotly(
# ggplot() + aes(x = as.numeric(rownames(addicts)), y = addicts.dfb[, 1]) + geom_point() + labs(x = "Observation Number", y = "dfbeta for Clinic Type", title = "dfbeta Values for Observation Number by Clinic Type") + theme_solarized()
# )
# Prison
ggplotly(
ggplot() + aes(x = as.numeric(rownames(addicts)), y = addicts.dfb[, 1]) + geom_point() + labs(x = "Observation Number", y = "dfbeta for Prison Status", title = "dfbeta Values for Observation Number by Prison Status") + theme_solarized()
)
#Methadone
ggplotly(
ggplot() + aes(x = as.numeric(rownames(addicts)), y = addicts.dfb[, 2]) + geom_point() + labs(x = "Observation Number", y = "dfbeta for Methadone Level", title = "dfbeta Values for Observation Number by Methadone Level") + theme_solarized()
)
addicts[c(8, 9, 26, 70, 156),]
## id clinic status time prison methadone
## 8 8 Clinic1 0 796 Yes 60
## 9 9 Clinic1 1 892 No 50
## 26 27 Clinic1 0 566 Yes 45
## 70 75 Clinic1 0 905 No 80
## 156 181 Clinic2 1 216 No 100
Observations 8 and 26 were prisoners who were censored observations in our data. 70 was a censored observation who was not a prisoner, but had a very long time. Given their long survival times, they could have felt they no longer needed to be part of the study. These prisoners also had low methadone levels. Observations 9 and 156 were recorded as leaving the clinic. 156, had a very high methadone level however.